1 Effect of UPSTM-Based Decorrelation on Feature Discovery

1.0.1 Loading the libraries

library("FRESA.CAD")
library(readxl)
library(igraph)
library(umap)
library(tsne)
library(entropy)
library(TH.data)
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

1.1 Material and Methods

About Dataset Coronaviruses are a broad family of viruses that have been linked to illnesses ranging from the common cold to more serious conditions such as Middle East Respiratory Syndrome (MERS) and Severe Acute Respiratory Syndrome (SARS) (SARS. In 2019, a new coronavirus (COVID-19) was discovered in Wuhan, China.

Sometimes, When PCR test resources are scarce and antigen test kits are inaccurate, clinicians look for alternate COVID-19 testing methods that can be completed in a day and handle thousands of samples. COVID-19 virus proteins should be lacking in normal people’s saliva. Some distinct proteins may be produced in response to COVID-19 infection and can be utilized as a signature to identify potentially infected people. Therefore, protein profiles in a patient’s saliva can indicate that he or she is infected with COVID-19.

Mass spectrometry is a method for determining the protein composition of a material. Saliva samples from hundreds of patients were studied in this dataset. So, a machine learning specialist is approached and charged with developing a machine learning model that can identify who COVID-19 infected since the PCR test cannot interpret the result completely.

1.2 The Data

https://www.kaggle.com/datasets/kerneler/saliva-testing-dataset?select=COVID-19_MS_dataset_train.csv


COVID_19_MS <- read.csv("~/GitHub/LatentBiomarkers/Data/COVID19_MS/COVID-19_MS_dataset_train.csv")

colnames(COVID_19_MS) <- str_replace_all(colnames(COVID_19_MS),"\\.","_")
COVID_19_MS$Person_ID <- NULL
sampleID <- unique(COVID_19_MS$Sample_ID)
spectraID <- colnames(COVID_19_MS)[str_detect(colnames(COVID_19_MS),"X")]

avgCOVID19 <- NULL
class <- COVID_19_MS[!duplicated(COVID_19_MS$Sample_ID),"PCR_result"]
for (id in sampleID)
{
  avgCOVID19 <- rbind(avgCOVID19,apply(COVID_19_MS[COVID_19_MS$Sample_ID %in% id,spectraID],2,mean))
}

avgCOVID19 <- as.data.frame(avgCOVID19)
rownames(avgCOVID19) <- sampleID
avgCOVID19$class <- 1*(class=="pos")

pander::pander(table(avgCOVID19$class))
0 1
37 109

1.2.0.1 Standarize the names for the reporting

studyName <- "COVID19_MS"
dataframe <- avgCOVID19
outcome <- "class"
thro <- 0.80
TopVariables <- 10
cexheat = 0.25

1.3 Generaring the report

1.3.1 Libraries

Some libraries

library(psych)
library(whitening)
library("vioplot")
library("rpart")

1.3.2 Data specs

pander::pander(c(rows=nrow(dataframe),col=ncol(dataframe)-1))
rows col
146 2715
pander::pander(table(dataframe[,outcome]))
0 1
37 109

varlist <- colnames(dataframe)
varlist <- varlist[varlist != outcome]

largeSet <- length(varlist) > 1500 

1.3.3 Scaling the data

Scaling and removing near zero variance columns and highly co-linear(r>0.99999) columns


  ### Some global cleaning
  sdiszero <- apply(dataframe,2,sd) > 1.0e-16
  dataframe <- dataframe[,sdiszero]

  varlist <- colnames(dataframe)[colnames(dataframe) != outcome]
  tokeep <- c(as.character(correlated_Remove(dataframe,varlist,thr=0.99999)),outcome)
  dataframe <- dataframe[,tokeep]

  varlist <- colnames(dataframe)
  varlist <- varlist[varlist != outcome]
  
  iscontinous <- sapply(apply(dataframe,2,unique),length) > 5 ## Only variables with enough samples



dataframeScaled <- FRESAScale(dataframe,method="OrderLogit")$scaledData

1.4 The heatmap of the data

numsub <- nrow(dataframe)
if (numsub > 1000) numsub <- 1000


if (!largeSet)
{

  hm <- heatMaps(data=dataframeScaled[1:numsub,],
                 Outcome=outcome,
                 Scale=TRUE,
                 hCluster = "row",
                 xlab="Feature",
                 ylab="Sample",
                 srtCol=45,
                 srtRow=45,
                 cexCol=cexheat,
                 cexRow=cexheat
                 )
  par(op)
}

1.4.0.1 Correlation Matrix of the Data

The heat map of the data


if (!largeSet)
{

  par(cex=0.6,cex.main=0.85,cex.axis=0.7)
  #cormat <- Rfast::cora(as.matrix(dataframe[,varlist]),large=TRUE)
  cormat <- cor(dataframe[,varlist],method="pearson")
  cormat[is.na(cormat)] <- 0
  gplots::heatmap.2(abs(cormat),
                    trace = "none",
  #                  scale = "row",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Original Correlation",
                    cexRow = cexheat,
                    cexCol = cexheat,
                     srtCol=45,
                     srtRow=45,
                    key.title=NA,
                    key.xlab="|Pearson Correlation|",
                    xlab="Feature", ylab="Feature")
  diag(cormat) <- 0
  print(max(abs(cormat)))
}

1.5 The decorrelation


DEdataframe <- IDeA(dataframe,verbose=TRUE,thr=thro)
#> 
#>  Included: 2328 , Uni p: 0.004316151 , Uncorrelated Base: 1297 , Outcome-Driven Size: 0 , Base Size: 1297 
#> 
#> 
 1 <R=0.994,r=0.972,N=   27>, Top: 11( 1 )[ 1 : 11 Fa= 11 : 0.972 ]( 11 , 14 , 0 ),<|>Tot Used: 25 , Added: 14 , Zero Std: 0 , Max Cor: 0.970
#> 
 2 <R=0.970,r=0.960,N=   27>, Top: 5( 1 )[ 1 : 5 Fa= 14 : 0.960 ]( 4 , 5 , 11 ),<|>Tot Used: 33 , Added: 5 , Zero Std: 0 , Max Cor: 0.959
#> 
 3 <R=0.959,r=0.955,N=   27>, Top: 3( 3 )[ 1 : 3 Fa= 15 : 0.955 ]( 3 , 6 , 14 ),<|>Tot Used: 37 , Added: 6 , Zero Std: 0 , Max Cor: 0.954
#> 
 4 <R=0.954,r=0.952,N=   27>, Top: 1( 1 )[ 1 : 1 Fa= 15 : 0.952 ]( 1 , 1 , 15 ),<|>Tot Used: 38 , Added: 1 , Zero Std: 0 , Max Cor: 0.951
#> 
 5 <R=0.951,r=0.925,N=   31>, Top: 14( 1 )[ 1 : 14 Fa= 23 : 0.925 ]( 14 , 16 , 15 ),<|>Tot Used: 59 , Added: 16 , Zero Std: 0 , Max Cor: 0.925
#> 
 6 <R=0.925,r=0.913,N=   31>, Top: 15( 1 )[ 1 : 15 Fa= 33 : 0.913 ]( 15 , 17 , 23 ),<|>Tot Used: 85 , Added: 17 , Zero Std: 0 , Max Cor: 0.912
#> 
 7 <R=0.912,r=0.906,N=   31>, Top: 6( 2 )[ 1 : 6 Fa= 36 : 0.906 ]( 6 , 8 , 33 ),<|>Tot Used: 96 , Added: 8 , Zero Std: 0 , Max Cor: 0.905
#> 
 8 <R=0.905,r=0.903,N=   31>, Top: 3( 1 )[ 1 : 3 Fa= 38 : 0.903 ]( 3 , 3 , 36 ),<|>Tot Used: 101 , Added: 3 , Zero Std: 0 , Max Cor: 0.902
#> 
 9 <R=0.902,r=0.851,N=  105>, Top: 38( 1 )[ 1 : 38 Fa= 65 : 0.851 ]( 38 , 57 , 38 ),<|>Tot Used: 175 , Added: 57 , Zero Std: 0 , Max Cor: 0.885
#> 
 10 <R=0.885,r=0.843,N=  105>, Top: 8( 1 )[ 1 : 8 Fa= 70 : 0.843 ]( 8 , 9 , 65 ),<|>Tot Used: 188 , Added: 9 , Zero Std: 0 , Max Cor: 0.842
#> 
 11 <R=0.842,r=0.800,N=  145>, Top: 53( 1 )[ 1 : 53 Fa= 102 : 0.800 ]( 53 , 72 , 70 ),<|>Tot Used: 283 , Added: 72 , Zero Std: 0 , Max Cor: 0.871
#> 
 12 <R=0.871,r=0.800,N=  145>, Top: 3( 1 )[ 1 : 3 Fa= 105 : 0.800 ]( 3 , 4 , 102 ),<|>Tot Used: 289 , Added: 4 , Zero Std: 0 , Max Cor: 0.800
#> 
 13 <R=0.800,r=0.800,N=    0>
#> 
 [ 13 ], 0.7981248 Decor Dimension: 289 Nused: 289 . Cor to Base: 148 , ABase: 30 , Outcome Base: 0 
#> 
varlistc <- colnames(DEdataframe)[colnames(DEdataframe) != outcome]

pander::pander(sum(apply(dataframe[,varlist],2,var)))

23332

pander::pander(sum(apply(DEdataframe[,varlistc],2,var)))

3247

pander::pander(entropy(discretize(unlist(dataframe[,varlist]), 256)))

0.132

pander::pander(entropy(discretize(unlist(DEdataframe[,varlistc]), 256)))

0.324

1.5.1 The decorrelation matrix


if (!largeSet)
{

  par(cex=0.6,cex.main=0.85,cex.axis=0.7)
  
  UPSTM <- attr(DEdataframe,"UPSTM")
  
  gplots::heatmap.2(1.0*(abs(UPSTM)>0),
                    trace = "none",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Decorrelation matrix",
                    cexRow = cexheat,
                    cexCol = cexheat,
                   srtCol=45,
                   srtRow=45,
                    key.title=NA,
                    key.xlab="|Beta|>0",
                    xlab="Output Feature", ylab="Input Feature")
  
  par(op)
}

1.6 The heatmap of the decorrelated data

if (!largeSet)
{

  hm <- heatMaps(data=DEdataframe[1:numsub,],
                 Outcome=outcome,
                 Scale=TRUE,
                 hCluster = "row",
                 cexRow = cexheat,
                 cexCol = cexheat,
                 srtCol=45,
                 srtRow=45,
                 xlab="Feature",
                 ylab="Sample")
  par(op)
}

1.7 The correlation matrix after decorrelation

if (!largeSet)
{

  cormat <- cor(DEdataframe[,varlistc],method="pearson")
  cormat[is.na(cormat)] <- 0
  
  gplots::heatmap.2(abs(cormat),
                    trace = "none",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Correlation after IDeA",
                    cexRow = cexheat,
                    cexCol = cexheat,
                     srtCol=45,
                     srtRow=45,
                    key.title=NA,
                    key.xlab="|Pearson Correlation|",
                    xlab="Feature", ylab="Feature")
  
  par(op)
  diag(cormat) <- 0
  print(max(abs(cormat)))
}

1.8 U-MAP Visualization of features

1.8.1 The UMAP based on LASSO on Raw Data


if (nrow(dataframe) < 1000)
{
  classes <- unique(dataframe[1:numsub,outcome])
  raincolors <- rainbow(length(classes))
  names(raincolors) <- classes
  datasetframe.umap = umap(scale(dataframe[1:numsub,varlist]),n_components=2)
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: Original",t='n')
  text(datasetframe.umap$layout,labels=dataframe[1:numsub,outcome],col=raincolors[dataframe[1:numsub,outcome]+1])
}

1.8.2 The decorralted UMAP

if (nrow(dataframe) < 1000)
{

  datasetframe.umap = umap(scale(DEdataframe[1:numsub,varlistc]),n_components=2)
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: After IDeA",t='n')
  text(datasetframe.umap$layout,labels=DEdataframe[1:numsub,outcome],col=raincolors[DEdataframe[1:numsub,outcome]+1])
}

1.9 Univariate Analysis

1.9.1 Univariate



univarRAW <- uniRankVar(varlist,
               paste(outcome,"~1"),
               outcome,
               dataframe,
               rankingTest="AUC")

100 : X297_73 200 : X382_77 300 : X479_57 400 : X629_81 500 : X796_11
600 : X969_8 700 : X1173_74 800 : X1364_32 900 : X1599_26 1000 : X1848_42
1100 : X2201_1 1200 : X2498_63 1300 : X2860_34 1400 : X3257_2 1500 : X3669_6
1600 : X4208_08 1700 : X4836_61 1800 : X5379_65 1900 : X6237_78 2000 : X7115_25
2100 : X8025_19 2200 : X9106_13 2300 : X10504_79 2400 : X11887_21 2500 : X13358_42
2600 : X14760_46




univarDe <- uniRankVar(varlistc,
               paste(outcome,"~1"),
               outcome,
               DEdataframe,
               rankingTest="AUC",
               )

100 : X297_73 200 : X382_77 300 : X479_57 400 : La_X629_81 500 : X796_11
600 : La_X969_8 700 : X1173_74 800 : X1364_32 900 : X1599_26 1000 : X1848_42
1100 : X2201_1 1200 : X2498_63 1300 : X2860_34 1400 : X3257_2 1500 : X3669_6
1600 : X4208_08 1700 : X4836_61 1800 : X5379_65 1900 : La_X6237_78 2000 : X7115_25
2100 : X8025_19 2200 : X9106_13 2300 : X10504_79 2400 : X11887_21 2500 : X13358_42
2600 : X14760_46

1.9.2 Final Table


univariate_columns <- c("caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC")

##topfive
topvar <- c(1:length(varlist)) <= TopVariables
pander::pander(univarRAW$orderframe[topvar,univariate_columns])
  caseMean caseStd controlMean controlStd controlKSP ROCAUC
X301_02 1.0971 1.288 5.780 4.428 0.37901 0.901
X1714_44 0.9653 1.354 2.775 2.143 0.16574 0.876
X1752_66 0.4318 0.300 1.092 0.570 0.52591 0.868
X1539_08 0.4536 0.386 1.205 0.790 0.08336 0.866
X900_35 0.7958 0.696 2.402 2.230 0.03660 0.841
X440_87 1.8551 1.815 4.788 3.289 0.14184 0.839
X1522_71 0.5368 0.438 1.262 0.787 0.54259 0.836
X3325_02 39.9174 33.638 5.890 11.627 0.00130 0.833
X3428_53 0.0818 0.313 0.566 1.047 0.00325 0.829
X256_5 2.4015 2.773 8.994 9.942 0.00576 0.824


topLAvar <- univarDe$orderframe$Name[str_detect(univarDe$orderframe$Name,"La_")]
topLAvar <- unique(c(univarDe$orderframe$Name[topvar],topLAvar[1:as.integer(TopVariables/2)]))
finalTable <- univarDe$orderframe[topLAvar,univariate_columns]

theLaVar <- rownames(finalTable)[str_detect(rownames(finalTable),"La_")]

pander::pander(univarDe$orderframe[topLAvar,univariate_columns])
  caseMean caseStd controlMean controlStd controlKSP ROCAUC
X301_02 1.0971 1.2879 5.7798 4.4284 0.37901 0.901
X1714_44 0.9653 1.3543 2.7750 2.1432 0.16574 0.876
X1752_66 0.4318 0.3003 1.0915 0.5697 0.52591 0.868
X1539_08 0.4536 0.3864 1.2050 0.7901 0.08336 0.866
X900_35 0.7958 0.6958 2.4018 2.2300 0.03660 0.841
X440_87 1.8551 1.8155 4.7878 3.2888 0.14184 0.839
X1522_71 0.5368 0.4383 1.2620 0.7869 0.54259 0.836
X3428_53 0.0818 0.3132 0.5656 1.0472 0.00325 0.829
X256_5 2.4015 2.7730 8.9943 9.9421 0.00576 0.824
X2870_03 0.6581 0.8780 1.5602 1.0686 0.75629 0.821
La_X7409_15 -0.0162 0.0816 -0.0638 0.0568 0.56572 0.757
La_X2962_99 0.2415 0.2010 0.0423 0.4892 0.05789 0.733
La_X3325_02 4.0996 5.4444 1.3749 2.1974 0.12652 0.723
La_X2472_01 0.3016 0.3318 0.7323 0.5305 0.47880 0.723
La_X2671_39 0.5193 0.6291 0.8706 0.8453 0.02388 0.722

dc <- getLatentCoefficients(DEdataframe)
fscores <- attr(DEdataframe,"fscore")

theSigDc <- dc[theLaVar]
names(theSigDc) <- NULL
theSigDc <- unique(names(unlist(theSigDc)))


theFormulas <- dc[rownames(finalTable)]
deFromula <- character(length(theFormulas))
names(deFromula) <- rownames(finalTable)

pander::pander(c(mean=mean(sapply(dc,length)),total=length(dc),fraction=length(dc)/(ncol(dataframe)-1)))
mean total fraction
2.01 211 0.0795


allSigvars <- names(dc)



dx <- names(deFromula)[1]
for (dx in names(deFromula))
{
  coef <- theFormulas[[dx]]
  cname <- names(theFormulas[[dx]])
  names(cname) <- cname
  for (cf in names(coef))
  {
    if (cf != dx)
    {
      if (coef[cf]>0)
      {
        deFromula[dx] <- paste(deFromula[dx],
                               sprintf("+ %5.3f*%s",coef[cf],cname[cf]))
      }
      else
      {
        deFromula[dx] <- paste(deFromula[dx],
                               sprintf("%5.3f*%s",coef[cf],cname[cf]))
      }
    }
  }
}

finalTable <- rbind(finalTable,univarRAW$orderframe[theSigDc[!(theSigDc %in% rownames(finalTable))],univariate_columns])


orgnamez <- rownames(finalTable)
orgnamez <- str_remove_all(orgnamez,"La_")
finalTable$RAWAUC <- univarRAW$orderframe[orgnamez,"ROCAUC"]
finalTable$DecorFormula <- deFromula[rownames(finalTable)]
finalTable$fscores <- fscores[rownames(finalTable)]

Final_Columns <- c("DecorFormula","caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC","RAWAUC","fscores")

finalTable <- finalTable[order(-finalTable$ROCAUC),]
pander::pander(finalTable[,Final_Columns])
  DecorFormula caseMean caseStd controlMean controlStd controlKSP ROCAUC RAWAUC fscores
X301_02 1.09708 1.2879 5.7798 4.4284 3.79e-01 0.901 0.901 NA
X1714_44 0.96530 1.3543 2.7750 2.1432 1.66e-01 0.876 0.876 NA
X1752_66 0.43183 0.3003 1.0915 0.5697 5.26e-01 0.868 0.868 NA
X1539_08 0.45356 0.3864 1.2050 0.7901 8.34e-02 0.866 0.866 NA
X900_35 0.79584 0.6958 2.4018 2.2300 3.66e-02 0.841 0.841 NA
X440_87 1.85509 1.8155 4.7878 3.2888 1.42e-01 0.839 0.839 1
X1522_71 0.53682 0.4383 1.2620 0.7869 5.43e-01 0.836 0.836 NA
X3325_02 NA 39.91739 33.6384 5.8901 11.6274 1.30e-03 0.833 0.833 NA
X3428_53 0.08181 0.3132 0.5656 1.0472 3.25e-03 0.829 0.829 NA
X256_5 2.40151 2.7730 8.9943 9.9421 5.76e-03 0.824 0.824 4
X2870_03 0.65813 0.8780 1.5602 1.0686 7.56e-01 0.821 0.821 NA
X3283_6 NA 7.93877 7.5390 1.0008 2.1724 6.86e-04 0.812 0.812 5
X3679_57 NA 0.16483 0.4102 0.3623 0.3880 4.48e-03 0.801 0.801 2
La_X7409_15 -0.293X3679_57 + 1.000X7409_15 -0.01621 0.0816 -0.0638 0.0568 5.66e-01 0.757 0.537 -1
La_X2962_99 + 1.000X2962_99 -0.811X5722_3 0.24148 0.2010 0.0423 0.4892 5.79e-02 0.733 0.404 0
La_X3325_02 -4.512X3283_6 + 1.000X3325_02 4.09958 5.4444 1.3749 2.1974 1.27e-01 0.723 0.833 -1
La_X2472_01 -6.670X606_79 + 1.000X2472_01 0.30158 0.3318 0.7323 0.5305 4.79e-01 0.723 0.696 -1
La_X2671_39 -6.958X877_66 + 1.000X2671_39 0.51927 0.6291 0.8706 0.8453 2.39e-02 0.722 0.699 -1
X2671_39 NA 0.58852 0.5979 1.2166 2.2963 6.50e-05 0.699 0.699 NA
X2472_01 NA 0.40507 0.2742 0.9290 1.2921 1.05e-02 0.696 0.696 NA
X5722_3 NA 0.06519 0.0785 0.5365 1.1121 1.31e-03 0.662 0.662 3
X7409_15 NA 0.03202 0.1558 0.0422 0.1033 2.84e-05 0.537 0.537 NA
X877_66 NA 0.00995 0.0335 0.0497 0.3025 9.77e-10 0.460 0.460 2
X606_79 NA 0.01552 0.0425 0.0295 0.1575 4.02e-09 0.448 0.448 2
X2962_99 NA 0.29435 0.1837 0.4774 1.0962 1.23e-05 0.404 0.404 NA

1.10 Comparing IDeA vs PCA vs EFA

1.10.1 PCA

featuresnames <- colnames(dataframe)[colnames(dataframe) != outcome]
pc <- prcomp(dataframe[,iscontinous],center = TRUE,tol=0.002)   #principal components
predPCA <- predict(pc,dataframe[,iscontinous])
PCAdataframe <- as.data.frame(cbind(predPCA,dataframe[,!iscontinous]))
colnames(PCAdataframe) <- c(colnames(predPCA),colnames(dataframe)[!iscontinous]) 
#plot(PCAdataframe[,colnames(PCAdataframe)!=outcome],col=dataframe[,outcome],cex=0.65,cex.lab=0.5,cex.axis=0.75,cex.sub=0.5,cex.main=0.75)

#pander::pander(pc$rotation)


PCACor <- cor(PCAdataframe[,colnames(PCAdataframe) != outcome])


  gplots::heatmap.2(abs(PCACor),
                    trace = "none",
  #                  scale = "row",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "PCA Correlation",
                    cexRow = 0.5,
                    cexCol = 0.5,
                     srtCol=45,
                     srtRow= -45,
                    key.title=NA,
                    key.xlab="Pearson Correlation",
                    xlab="Feature", ylab="Feature")

1.10.2 EFA


EFAdataframe <- dataframeScaled

if (length(iscontinous) < 2000)
{
  topred <- min(length(iscontinous),nrow(dataframeScaled),ncol(predPCA)/2)
  if (topred < 2) topred <- 2
  
  uls <- fa(dataframeScaled[,iscontinous],nfactors=topred,rotate="varimax",warnings=FALSE)  # EFA analysis
  predEFA <- predict(uls,dataframeScaled[,iscontinous])
  EFAdataframe <- as.data.frame(cbind(predEFA,dataframeScaled[,!iscontinous]))
  colnames(EFAdataframe) <- c(colnames(predEFA),colnames(dataframeScaled)[!iscontinous]) 


  
  EFACor <- cor(EFAdataframe[,colnames(EFAdataframe) != outcome])
  
  
    gplots::heatmap.2(abs(EFACor),
                      trace = "none",
    #                  scale = "row",
                      mar = c(5,5),
                      col=rev(heat.colors(5)),
                      main = "EFA Correlation",
                      cexRow = 0.5,
                      cexCol = 0.5,
                       srtCol=45,
                       srtRow= -45,
                      key.title=NA,
                      key.xlab="Pearson Correlation",
                      xlab="Feature", ylab="Feature")
}

1.11 Effect on CAR modeling

par(op)
par(xpd = TRUE)
dataframe[,outcome] <- factor(dataframe[,outcome])
rawmodel <- rpart(paste(outcome,"~."),dataframe,control=rpart.control(maxdepth=3))
pr <- predict(rawmodel,dataframe,type = "class")

  ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
  if (length(unique(pr))>1)
  {
    plot(rawmodel,main="Raw",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
    text(rawmodel, use.n = TRUE,cex=0.75)
    ptab <- epiR::epi.tests(table(pr==0,dataframe[,outcome]==0))
  }


pander::pander(table(dataframe[,outcome],pr))
  0 1
0 31 6
1 4 105
pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.7603 0.6827 0.8270
    tp 0.7466 0.6680 0.8149
    se 0.9633 0.9087 0.9899
    sp 0.8378 0.6799 0.9381
    diag.ac 0.9315 0.8776 0.9667
    diag.or 135.6250 35.9750 511.3030
    nndx 1.2482 1.0776 1.6990
    youden 0.8011 0.5886 0.9280
    pv.pos 0.9459 0.8861 0.9799
    pv.neg 0.8857 0.7326 0.9680
    lr.pos 5.9404 2.8532 12.3678
    lr.neg 0.0438 0.0166 0.1158
    p.rout 0.2397 0.1730 0.3173
    p.rin 0.7603 0.6827 0.8270
    p.tpdn 0.1622 0.0619 0.3201
    p.tndp 0.0367 0.0101 0.0913
    p.dntp 0.0541 0.0201 0.1139
    p.dptn 0.1143 0.0320 0.2674
  • tab:

      Outcome + Outcome - Total
    Test + 105 6 111
    Test - 4 31 35
    Total 109 37 146
  • method: exact

  • digits: 2

  • conf.level: 0.95

pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.932 0.878 0.967
3 se 0.963 0.909 0.990
4 sp 0.838 0.680 0.938
6 diag.or 135.625 35.975 511.303

par(op)
par(xpd = TRUE)
DEdataframe[,outcome] <- factor(DEdataframe[,outcome])
IDeAmodel <- rpart(paste(outcome,"~."),DEdataframe,control=rpart.control(maxdepth=3))
pr <- predict(IDeAmodel,DEdataframe,type = "class")

  ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
  if (length(unique(pr))>1)
  {
    plot(IDeAmodel,main="IDeA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
    text(IDeAmodel, use.n = TRUE,cex=0.75)
    ptab <- epiR::epi.tests(table(pr==0,DEdataframe[,outcome]==0))
  }

pander::pander(table(DEdataframe[,outcome],pr))
  0 1
0 31 6
1 3 106
pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.7671 0.69007 0.8330
    tp 0.7466 0.66800 0.8149
    se 0.9725 0.92167 0.9943
    sp 0.8378 0.67986 0.9381
    diag.ac 0.9384 0.88621 0.9714
    diag.or 182.5556 43.14327 772.4619
    nndx 1.2341 1.07254 1.6624
    youden 0.8103 0.60153 0.9324
    pv.pos 0.9464 0.88704 0.9801
    pv.neg 0.9118 0.76322 0.9814
    lr.pos 5.9969 2.88107 12.4826
    lr.neg 0.0328 0.01067 0.1012
    p.rout 0.2329 0.16698 0.3099
    p.rin 0.7671 0.69007 0.8330
    p.tpdn 0.1622 0.06193 0.3201
    p.tndp 0.0275 0.00571 0.0783
    p.dntp 0.0536 0.01991 0.1130
    p.dptn 0.0882 0.01858 0.2368
  • tab:

      Outcome + Outcome - Total
    Test + 106 6 112
    Test - 3 31 34
    Total 109 37 146
  • method: exact

  • digits: 2

  • conf.level: 0.95

pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.938 0.886 0.971
3 se 0.972 0.922 0.994
4 sp 0.838 0.680 0.938
6 diag.or 182.556 43.143 772.462

par(op)
par(xpd = TRUE)
PCAdataframe[,outcome] <- factor(PCAdataframe[,outcome])
PCAmodel <- rpart(paste(outcome,"~."),PCAdataframe,control=rpart.control(maxdepth=3))
pr <- predict(PCAmodel,PCAdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
  plot(PCAmodel,main="PCA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
  text(PCAmodel, use.n = TRUE,cex=0.75)
  ptab <- epiR::epi.tests(table(pr==0,PCAdataframe[,outcome]==0))
}

pander::pander(table(PCAdataframe[,outcome],pr))
  0 1
0 25 12
1 2 107
pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.8151 0.74246 0.8744
    tp 0.7466 0.66800 0.8149
    se 0.9817 0.93529 0.9978
    sp 0.6757 0.50215 0.8199
    diag.ac 0.9041 0.84435 0.9466
    diag.or 111.4583 23.44538 529.8682
    nndx 1.5213 1.22304 2.2861
    youden 0.6573 0.43743 0.8176
    pv.pos 0.8992 0.83048 0.9468
    pv.neg 0.9259 0.75710 0.9909
    lr.pos 3.0268 1.89972 4.8224
    lr.neg 0.0272 0.00676 0.1092
    p.rout 0.1849 0.12555 0.2575
    p.rin 0.8151 0.74246 0.8744
    p.tpdn 0.3243 0.18014 0.4979
    p.tndp 0.0183 0.00223 0.0647
    p.dntp 0.1008 0.05320 0.1695
    p.dptn 0.0741 0.00910 0.2429
  • tab:

      Outcome + Outcome - Total
    Test + 107 12 119
    Test - 2 25 27
    Total 109 37 146
  • method: exact

  • digits: 2

  • conf.level: 0.95

pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.904 0.844 0.947
3 se 0.982 0.935 0.998
4 sp 0.676 0.502 0.820
6 diag.or 111.458 23.445 529.868


par(op)

1.11.1 EFA


  EFAdataframe[,outcome] <- factor(EFAdataframe[,outcome])
  EFAmodel <- rpart(paste(outcome,"~."),EFAdataframe,control=rpart.control(maxdepth=3))
  pr <- predict(EFAmodel,EFAdataframe,type = "class")
  
  ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
  if (length(unique(pr))>1)
  {
    plot(EFAmodel,main="EFA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
    text(EFAmodel, use.n = TRUE,cex=0.75)
    ptab <- epiR::epi.tests(table(pr==0,EFAdataframe[,outcome]==0))
  }


  pander::pander(table(EFAdataframe[,outcome],pr))
  0 1
0 31 6
1 4 105
  pander::pander(ptab)
  • detail:

    statistic est lower upper
    ap 0.7603 0.6827 0.8270
    tp 0.7466 0.6680 0.8149
    se 0.9633 0.9087 0.9899
    sp 0.8378 0.6799 0.9381
    diag.ac 0.9315 0.8776 0.9667
    diag.or 135.6250 35.9750 511.3030
    nndx 1.2482 1.0776 1.6990
    youden 0.8011 0.5886 0.9280
    pv.pos 0.9459 0.8861 0.9799
    pv.neg 0.8857 0.7326 0.9680
    lr.pos 5.9404 2.8532 12.3678
    lr.neg 0.0438 0.0166 0.1158
    p.rout 0.2397 0.1730 0.3173
    p.rin 0.7603 0.6827 0.8270
    p.tpdn 0.1622 0.0619 0.3201
    p.tndp 0.0367 0.0101 0.0913
    p.dntp 0.0541 0.0201 0.1139
    p.dptn 0.1143 0.0320 0.2674
  • tab:

      Outcome + Outcome - Total
    Test + 105 6 111
    Test - 4 31 35
    Total 109 37 146
  • method: exact

  • digits: 2

  • conf.level: 0.95

  pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.932 0.878 0.967
3 se 0.963 0.909 0.990
4 sp 0.838 0.680 0.938
6 diag.or 135.625 35.975 511.303
  par(op)